# If running scripts one after the other, then should remove some elements because names are reused
# rm(list=ls())
Sys.setlocale("LC_CTYPE", "EN_US.UTF-8")
setwd("~/Dropbox/Dissertation/Ideological_Labels/Replication")
library(foreign)
library(broom)
options(scipen = 999)


## Read in Data
data <- read.dta("abs1_china2002.dta")

################################################################################################
############################ Recoding  Variables  ##############################################
################################################################################################

## Create ideology variables

# 我们一般以“左”“右”来区分人们的政治态度,如果用 1 代表最左,6 代表最右,您觉得下面 的个人或是党的态度属于哪种?
# 自己 			 1------2-------3-------4-------5-------6
# 中国共产党		 1------2-------3-------4-------5-------6

## This was a 1-7 scale, rather than 1-6 as in the released questionnaire. See email correspondence with Tianguang Meng, 4/7/18.

data$own.ideology <- data$v54a
data$own.ideology[data$v54a %in% 8:9] <- NA

data$ccp.ideology <- data$v54b
data$ccp.ideology[data$v54b %in% 8:9] <- NA

# 29. 有人说即使国家遇到困, 政府在处理国家大事时也应遵守法律; 也有人认为在特殊情况下政府可以不必理会法律上的规定, 您同意哪那种说法? 

data$abide.law[data$v29 == "SHOULD ALWAY OBEY LAW"] <- 1
data$abide.law[data$v29 == "MAY IGNORE LAW"] <- 0 
data$abide.law[data$v29 == "DON'T KNOW"] <- NA
data$abide.law[data$v29 == "NO ANSWER"] <- NA

# 43. 您认为选举对经济发展是有利还是不利？

data$elections.impact.econ[data$v43 == "HELPFUL"] <- 3
data$elections.impact.econ[data$v43 == "NO INFLUENCE"] <- 2
data$elections.impact.econ[data$v43 == "HARMFUL"] <- 1
data$elections.impact.econ[data$v43 == "DON'T KNOW"] <- NA
data$elections.impact.econ[data$v43 == "NO ANSWER"] <- NA
data$elections.impact.econ[data$v43 == "NOT APPLICALBE"] <- NA

# 49b. 法院在审判重大案件时, 应该接受当地政府的意见

data$court.gov.opinion[data$v49b == "STRONGLY AGREE"] <- 4
data$court.gov.opinion[data$v49b == "AGREE"] <- 3
data$court.gov.opinion[data$v49b == "DISAGREE"] <- 2
data$court.gov.opinion[data$v49b == "STRONGLY DISAGREE"] <- 1
data$court.gov.opinion[data$v49b == "DON'T KNOW"] <- NA
data$court.gov.opinion[data$v49b == "NO ANSWER"] <- NA

# 49c. 在我国,实行民主要依靠党的领导

data$democracy.under.ccp[data$v49c == "STRONGLY AGREE"] <- 4
data$democracy.under.ccp[data$v49c == "AGREE"] <- 3
data$democracy.under.ccp[data$v49c == "DISAGREE"] <- 2
data$democracy.under.ccp[data$v49c == "STRONGLY DISAGREE"] <- 1
data$democracy.under.ccp[data$v49c == "DON'T KNOW"] <- NA
data$democracy.under.ccp[data$v49c == "NO ANSWER"] <- NA

# 49d. 尽管我们国家的政治制度有这样那样的缺点,但它 还是最适合我国的国情

data$pol.system[data$v49d == "STRONGLY AGREE"] <- 4
data$pol.system[data$v49d == "AGREE"] <- 3
data$pol.system[data$v49d == "DISAGREE"] <- 2
data$pol.system[data$v49d == "STRONGLY DISAGREE"] <- 1
data$pol.system[data$v49d == "DON'T KNOW"] <- NA
data$pol.system[data$v49d == "NO ANSWER"] <- NA

# 49g. 对残暴的罪犯,应立即处罚,不必等待法院的复杂审判程序

data$summary.justice[data$v49g == "STRONGLY AGREE"] <- 4
data$summary.justice[data$v49g == "AGREE"] <- 3
data$summary.justice[data$v49g == "DISAGREE"] <- 2
data$summary.justice[data$v49g == "STRONGLY DISAGREE"] <- 1
data$summary.justice[data$v49g == "DON'T KNOW"] <- NA
data$summary.justice[data$v49g == "NO ANSWER"] <- NA

# 49h. 老百姓对政府的决定应该不计个人得失,全力支持

data$uncond.support[data$v49h == "STRONGLY AGREE"] <- 4
data$uncond.support[data$v49h == "AGREE"] <- 3
data$uncond.support[data$v49h == "DISAGREE"] <- 2
data$uncond.support[data$v49h == "STRONGLY DISAGREE"] <- 1
data$uncond.support[data$v49h == "DON'T KNOW"] <- NA
data$uncond.support[data$v49h == "NO ANSWER"] <- NA

# 49i. 政府官员所作的决策总是正确的

data$infalliable.officials[data$v49i == "STRONGLY AGREE"] <- 4
data$infalliable.officials[data$v49i == "AGREE"] <- 3
data$infalliable.officials[data$v49i == "DISAGREE"] <- 2
data$infalliable.officials[data$v49i == "STRONGLY DISAGREE"] <- 1
data$infalliable.officials[data$v49i == "DON'T KNOW"] <- NA
data$infalliable.officials[data$v49i == "NO ANSWER"] <- NA

# 49m. 我国的政治制度是世界上最好的

data$best.system[data$v49m == "STRONGLY AGREE"] <- 4
data$best.system[data$v49m == "AGREE"] <- 3
data$best.system[data$v49m == "DISAGREE"] <- 2
data$best.system[data$v49m == "STRONGLY DISAGREE"] <- 1
data$best.system[data$v49m == "DON'T KNOW"] <- NA
data$best.system[data$v49m == "NO ANSWER"] <- NA

# 49o. 在一个地方上,如果东一个团体,西一个团体, 就会影响地方的安定与和谐

data$pluralist.chaos[data$v49o == "STRONGLY AGREE"] <- 4
data$pluralist.chaos[data$v49o == "AGREE"] <- 3
data$pluralist.chaos[data$v49o == "DISAGREE"] <- 2
data$pluralist.chaos[data$v49o == "STRONGLY DISAGREE"] <- 1
data$pluralist.chaos[data$v49o == "DON'T KNOW"] <- NA
data$pluralist.chaos[data$v49o == "NO ANSWER"] <- NA

# 49p. 大多数的政府官员都能牺牲个人利益为人民服务

data$selfless.officials[data$v49p == "STRONGLY AGREE"] <- 4
data$selfless.officials[data$v49p == "AGREE"] <- 3
data$selfless.officials[data$v49p == "DISAGREE"] <- 2
data$selfless.officials[data$v49p == "STRONGLY DISAGREE"] <- 1
data$selfless.officials[data$v49p == "DON'T KNOW"] <- NA
data$selfless.officials[data$v49p == "NO ANSWER"] <- NA

# 55. 您认为我国近年来政治改革的速度怎么样?

data$against.polreform[data$v55 == "TOO FAST"] <- 3
data$against.polreform[data$v55 == "JUST RIGHT"] <- 2
data$against.polreform[data$v55 == "TOO SLOW"] <- 1
data$against.polreform[data$v55 == "DON'T KNOW"] <- NA
data$against.polreform[data$v55 == "NO ANSWER"] <- NA

# 56. 您认为我国近年来社会变化的速度怎么样?

data$against.socialchange[data$v56 == "TOO FAST"] <- 3
data$against.socialchange[data$v56 == "JUST RIGHT"] <- 2
data$against.socialchange[data$v56 == "TOO SLOW"] <- 1
data$against.socialchange[data$v56 == "DON'T KNOW"] <- NA
data$against.socialchange[data$v56 == "NO ANSWER"] <- NA

# 57. 有人认为维持国家统一是至高无上的; 也有人认为如果部分地区选择分离, 就应该让他们独立。 请问您比较同意哪一个说法?

data$unity.key[data$v57 == "UNIFICATION IS  HIGHEST VALUE"] <- 1
data$unity.key[data$v57 == "INDEPENDENCE IF PEOPLE CHOOSE"] <- 0
data$unity.key[data$v57 == "DON'T KNOW"] <- NA
data$unity.key[data$v57 == "NO ANSWER"] <- NA
data$unity.key[data$v57 == "CAN'T ANSWER"] <- NA

# 58a. 目前在我国进一步扩大民主很有必要

data$expand.democracy[data$v58a == "STRONGLY AGREE"] <- 4
data$expand.democracy[data$v58a == "AGREE"] <- 3
data$expand.democracy[data$v58a == "DISAGREE"] <- 2
data$expand.democracy[data$v58a == "STRONGLY DISAGREE"] <- 1
data$expand.democracy[data$v58a == "DON'T KNOW"] <- NA
data$expand.democracy[data$v58a == "NO ANSWER"] <- NA

# 58c. 国家领导人在报纸和电视上讲话时, 通常讲的是真话

data$truthful.officials[data$v58c == "STRONGLY AGREE"] <- 4
data$truthful.officials[data$v58c == "AGREE"] <- 3
data$truthful.officials[data$v58c == "DISAGREE"] <- 2
data$truthful.officials[data$v58c == "STRONGLY DISAGREE"] <- 1
data$truthful.officials[data$v58c == "DON'T KNOW"] <- NA
data$truthful.officials[data$v58c == "NO ANSWER"] <- NA

# 58g. 国家是一个大家庭,纵然是少数民族,也不可要求脱离

data$against.secession[data$v58g == "STRONGLY AGREE"] <- 4
data$against.secession[data$v58g == "AGREE"] <- 3
data$against.secession[data$v58g == "DISAGREE"] <- 2
data$against.secession[data$v58g == "STRONGLY DISAGREE"] <- 1
data$against.secession[data$v58g == "DON'T KNOW"] <- NA
data$against.secession[data$v58g == "NO ANSWER"] <- NA

# 58m. 收回中国历史上的失土是所有中国人神圣的使命 NOTE - RESPONSES FOR THIS QUESTION DON'T APPEAR IN THE DATASET

# data$recover.territory[data$v58m == "STRONGLY AGREE"] <- 4
# data$recover.territory[data$v58m == "AGREE"] <- 3
# data$recover.territory[data$v58m == "DISAGREE"] <- 2
# data$recover.territory[data$v58m == "STRONGLY DISAGREE"] <- 1
# data$recover.territory[data$v58m == "DON'T KNOW"] <- NA
# data$recover.territory[data$v58m == "NO ANSWER"] <- NA

# 58o. 国家操纵在少数有权人的手中,一般民众无法过问

data$elite.domination[data$v58o == "STRONGLY AGREE"] <- 4
data$elite.domination[data$v58o == "AGREE"] <- 3
data$elite.domination[data$v58o == "DISAGREE"] <- 2
data$elite.domination[data$v58o == "STRONGLY DISAGREE"] <- 1
data$elite.domination[data$v58o == "DON'T KNOW"] <- NA
data$elite.domination[data$v58o == "NO ANSWER"] <- NA

# 58r. 大型国有企业,仍应维持国有,不宜私有化

data$soe.control[data$v58r == "STRONGLY AGREE"] <- 4
data$soe.control[data$v58r == "AGREE"] <- 3
data$soe.control[data$v58r == "DISAGREE"] <- 2
data$soe.control[data$v58r == "STRONGLY DISAGREE"] <- 1
data$soe.control[data$v58r == "DON'T KNOW"] <- NA
data$soe.control[data$v58r == "NO ANSWER"] <- NA

# 58s. 中央对地方政府的决策,应比现在具有更大的监督权

data$oversee.local[data$v58s == "STRONGLY AGREE"] <- 4
data$oversee.local[data$v58s == "AGREE"] <- 3
data$oversee.local[data$v58s == "DISAGREE"] <- 2
data$oversee.local[data$v58s == "STRONGLY DISAGREE"] <- 1
data$oversee.local[data$v58s == "DON'T KNOW"] <- NA
data$oversee.local[data$v58s == "NO ANSWER"] <- NA

# 58t. 我们应该维护自己的生活方式,以免逐渐变成其它国家的翻版

data$chinese.excep[data$v58t == "STRONGLY AGREE"] <- 4
data$chinese.excep[data$v58t == "AGREE"] <- 3
data$chinese.excep[data$v58t == "DISAGREE"] <- 2
data$chinese.excep[data$v58t == "STRONGLY DISAGREE"] <- 1
data$chinese.excep[data$v58t == "DON'T KNOW"] <- NA
data$chinese.excep[data$v58t == "NO ANSWER"] <- NA

# 58u. 为了国家,个人的利益都可以牺牲

data$sacrifice.individual[data$v58u == "STRONGLY AGREE"] <- 4
data$sacrifice.individual[data$v58u == "AGREE"] <- 3
data$sacrifice.individual[data$v58u == "DISAGREE"] <- 2
data$sacrifice.individual[data$v58u == "STRONGLY DISAGREE"] <- 1
data$sacrifice.individual[data$v58u == "DON'T KNOW"] <- NA
data$sacrifice.individual[data$v58u == "NO ANSWER"] <- NA

# 58v. 在政治上为了达成重要目标,不必太计较用什么手段

data$any.means[data$v58v == "STRONGLY AGREE"] <- 4
data$any.means[data$v58v == "AGREE"] <- 3
data$any.means[data$v58v == "DISAGREE"] <- 2
data$any.means[data$v58v == "STRONGLY DISAGREE"] <- 1
data$any.means[data$v58v == "DON'T KNOW"] <- NA
data$any.means[data$v58v == "NO ANSWER"] <- NA

# 58w. 为了贯彻自己的政治理想,就不能太容忍那些持反对意 见的人

data$ignore.opposition[data$v58w == "STRONGLY AGREE"] <- 4
data$ignore.opposition[data$v58w == "AGREE"] <- 3
data$ignore.opposition[data$v58w == "DISAGREE"] <- 2
data$ignore.opposition[data$v58w == "STRONGLY DISAGREE"] <- 1
data$ignore.opposition[data$v58w == "DON'T KNOW"] <- NA
data$ignore.opposition[data$v58w == "NO ANSWER"] <- NA

# 58y. 只要获得多数人的支持,一位政治人物就应贯彻他的主张,不必顾及少数人的看法

data$ignore.minority[data$v58y == "STRONGLY AGREE"] <- 4
data$ignore.minority[data$v58y == "AGREE"] <- 3
data$ignore.minority[data$v58y == "DISAGREE"] <- 2
data$ignore.minority[data$v58y == "STRONGLY DISAGREE"] <- 1
data$ignore.minority[data$v58y == "DON'T KNOW"] <- NA
data$ignore.minority[data$v58y == "NO ANSWER"] <- NA

# 66. 说到一般的穷人,您认为他们的穷主要是自己造成的,还是因为社会的原因?

data$individual.responsibility[data$v66 == "THEIR OWN RESPONSIBILITY"] <- 3
data$individual.responsibility[data$v66 == "JOINT"] <- 2
data$individual.responsibility[data$v66 == "RESPONSIBILITY OF SOCIETY"] <- 1
data$individual.responsibility[data$v66 == "DON'T KNOW"] <- NA
data$individual.responsibility[data$v66 == "NO ANSWER"] <- NA

# 67. 有人认为,对个人的合法收入不论多少,都不应该加以限制;也有人认为,对少数收入特别高 的给予一定限制是必要的,您的意见是什么?

data$restrict.income[data$v67 == "CERTAIN RESTRICTION"] <- 1 
data$restrict.income[data$v67 == "NO ANY RESTRICTION"] <- 0 
data$restrict.income[data$v67 == "DON'T KNOW"] <- NA
data$restrict.income[data$v67 == "NO ANSWER"] <- NA
data$restrict.income[data$v67 == "NOT APPLICABLE"] <- NA

# 68. 有人认为,不仅要进行经济改革,而且应加速政治改革;也有人认为,政治改革会带来不稳定, 目前不宜进行,您的意见是什么?

data$no.polreform[data$v68 == "NO POLITICAL REFORM"] <- 1
data$no.polreform[data$v68 == "SPEED UP POLITICAL REFORM"] <- 0 
data$no.polreform[data$v68 == "DON'T KNOW"] <- NA
data$no.polreform[data$v68 == "NO ANSWER"] <- NA

# 69. 如果拿现在和改革开放前相比(1979 年以前比),您觉得哪些方面比以前进步, 哪些方面不如以 前?
# 69a. 经济发展

data$econ.improved <- data$v69a
data$econ.improved[data$v69a %in% 7:9] <- NA

# 69b. 贪污腐败

data$corruption.improved <- data$v69b
data$corruption.improved[data$v69b %in% 7:9] <- NA

# 69c. 贫富差距

data$inequality.improved <- data$v69c
data$inequality.improved[data$v69c %in% 7:9] <- NA

# 69d. 自由表达自己的思想

data$freedom.of.speech.improved <- data$v69d
data$freedom.of.speech.improved[data$v69d %in% 7:9] <- NA

# 69e. 每一个人都可以受到政府的公平对待

data$equal.protection.improved <- data$v69e
data$equal.protection.improved[data$v69e %in% 7:9] <- NA

# 69f. 像我这样的人也可以影响到政府的政策

data$efficacy.improved <- data$v69f
data$efficacy.improved[data$v69f %in% 7:9] <- NA

# 69g. 社会治安

data$order.improved <- data$v69g
data$order.improved[data$v69g %in% 7:9] <- NA

# 69h. 司法独立不受政治干预

data$legal.indep.improved <- data$v69h
data$legal.indep.improved[data$v69h %in% 7:9] <- NA

# 69i. 自由参加任何组织

data$free.association.improved <- data$v69i
data$free.association.improved[data$v69i %in% 7:9] <- NA

# 69j. 自由参加宗教活动(自由选择宗教信仰)

data$free.religion.improved <- data$v69j
data$free.religion.improved[data$v69j %in% 7:9] <- NA

# 69k. 爱搬到那住就把搬到那住

data$free.residence.improved <- data$v69k
data$free.residence.improved[data$v69k %in% 7:9] <- NA

# 69l. 人人都有基本的生活保障

data$living.allowance.improved <- data$v69l
data$living.allowance.improved[data$v69l %in% 7:9] <- NA

# 69m. 个人的政治权利

data$personal.pol.rights.improved <- data$v69m
data$personal.pol.rights.improved[data$v69m %in% 7:9] <- NA

# 70. 对于下面提到的一些问题,有些人认为政府应该负主要责任,也有些人认为个人应该负主要责 任,请问您认为应该由谁来负责? 
# 70a. 就业

data$employment.responsibility[data$v70a == "TOTALLY INDIVIDUAL RESPONSIBILITY"] <- 5
data$employment.responsibility[data$v70a == "INDIVIDUAL RESPONSIBILITY"] <- 4
data$employment.responsibility[data$v70a == "JOINT"] <- 3
data$employment.responsibility[data$v70a == "MAINLY GOVERN'T RESPONSIBILITY"] <- 2
data$employment.responsibility[data$v70a == "TOTALLY GOVERN'T RESPONSIBILITY"] <- 1
data$employment.responsibility[data$v70a == "DON'T KNOW"] <- NA
data$employment.responsibility[data$v70a == "NO ANSWER"] <- NA

# 70b. 住房

data$housing.responsibility[data$v70b == "TOTALLY INDIVIDUAL RESPONSIBILITY"] <- 5
data$housing.responsibility[data$v70b == "INDIVIDUAL RESPONSIBILITY"] <- 4
data$housing.responsibility[data$v70b == "JOINT"] <- 3
data$housing.responsibility[data$v70b == "MAINLY GOVERN'T RESPONSIBILITY"] <- 2
data$housing.responsibility[data$v70b == "TOTALLY GOVERN'T RESPONSIBILITY"] <- 1
data$housing.responsibility[data$v70b == "DON'T KNOW"] <- NA
data$housing.responsibility[data$v70b == "NO ANSWER"] <- NA

# 70c. 医疗

data$medical.responsibility[data$v70c == "TOTALLY INDIVIDUAL RESPONSIBILITY"] <- 5
data$medical.responsibility[data$v70c == "INDIVIDUAL RESPONSIBILITY"] <- 4
data$medical.responsibility[data$v70c == "JOINT"] <- 3
data$medical.responsibility[data$v70c == "MAINLY GOVERN'T RESPONSIBILITY"] <- 2
data$medical.responsibility[data$v70c == "TOTALLY GOVERN'T RESPONSIBILITY"] <- 1
data$medical.responsibility[data$v70c == "DON'T KNOW"] <- NA
data$medical.responsibility[data$v70c == "NO ANSWER"] <- NA

# 70d. 基础教育

data$education.responsibility[data$v70d == "TOTALLY INDIVIDUAL RESPONSIBILITY"] <- 5
data$education.responsibility[data$v70d == "INDIVIDUAL RESPONSIBILITY"] <- 4
data$education.responsibility[data$v70d == "JOINT"] <- 3
data$education.responsibility[data$v70d == "MAINLY GOVERN'T RESPONSIBILITY"] <- 2
data$education.responsibility[data$v70d == "TOTALLY GOVERN'T RESPONSIBILITY"] <- 1
data$education.responsibility[data$v70d == "DON'T KNOW"] <- NA
data$education.responsibility[data$v70d == "NO ANSWER"] <- NA

# 70e. 退休后的生活保障

data$retirement.responsibility[data$v70e == "TOTALLY INDIVIDUAL RESPONSIBILITY"] <- 5
data$retirement.responsibility[data$v70e == "INDIVIDUAL RESPONSIBILITY"] <- 4
data$retirement.responsibility[data$v70e == "JOINT"] <- 3
data$retirement.responsibility[data$v70e == "MAINLY GOVERN'T RESPONSIBILITY"] <- 2
data$retirement.responsibility[data$v70e == "TOTALLY GOVERN'T RESPONSIBILITY"] <- 1
data$retirement.responsibility[data$v70e == "DON'T KNOW"] <- NA
data$retirement.responsibility[data$v70e == "NO ANSWER"] <- NA

# 71. 您喜不喜欢下面这些国家?
# 71a. 俄罗斯

data$like.russia[data$v71a == "VERY LIKE"] <- 4
data$like.russia[data$v71a == "LIKE"] <- 3
data$like.russia[data$v71a == "DO NOT LIKE"] <- 2
data$like.russia[data$v71a == "NOT LIKE AT ALL"] <- 1
data$like.russia[data$v71a == "DON'T KNOW"] <- NA
data$like.russia[data$v71a == "NO ANSWER"] <- NA

# 71b. 美国

data$like.usa[data$v71b == "VERY LIKE"] <- 4
data$like.usa[data$v71b == "LIKE"] <- 3
data$like.usa[data$v71b == "DO NOT LIKE"] <- 2
data$like.usa[data$v71b == "NOT LIKE AT ALL"] <- 1
data$like.usa[data$v71b == "DON'T KNOW"] <- NA
data$like.usa[data$v71b == "NO ANSWER"] <- NA

# 71c. 日本

data$like.japan[data$v71c == "VERY LIKE"] <- 4
data$like.japan[data$v71c == "LIKE"] <- 3
data$like.japan[data$v71c == "DO NOT LIKE"] <- 2
data$like.japan[data$v71c == "NOT LIKE AT ALL"] <- 1
data$like.japan[data$v71c == "DON'T KNOW"] <- NA
data$like.japan[data$v71c == "NO ANSWER"] <- NA

# 71d. 德国

data$like.germany[data$v71d == "VERY LIKE"] <- 4
data$like.germany[data$v71d == "LIKE"] <- 3
data$like.germany[data$v71d == "DO NOT LIKE"] <- 2
data$like.germany[data$v71d == "NOT LIKE AT ALL"] <- 1
data$like.germany[data$v71d == "DON'T KNOW"] <- NA
data$like.germany[data$v71d == "NO ANSWER"] <- NA

# 71e. 韩国

data$like.southkorea[data$v71e == "VERY LIKE"] <- 4
data$like.southkorea[data$v71e == "LIKE"] <- 3
data$like.southkorea[data$v71e == "DO NOT LIKE"] <- 2
data$like.southkorea[data$v71e == "NOT LIKE AT ALL"] <- 1
data$like.southkorea[data$v71e == "DON'T KNOW"] <- NA
data$like.southkorea[data$v71e == "NO ANSWER"] <- NA

# 71f. 法国

data$like.france[data$v71f == "VERY LIKE"] <- 4
data$like.france[data$v71f == "LIKE"] <- 3
data$like.france[data$v71f == "DO NOT LIKE"] <- 2
data$like.france[data$v71f == "NOT LIKE AT ALL"] <- 1
data$like.france[data$v71f == "DON'T KNOW"] <- NA
data$like.france[data$v71f == "NO ANSWER"] <- NA

# 71g. 英国

data$like.uk[data$v71g == "VERY LIKE"] <- 4
data$like.uk[data$v71g == "LIKE"] <- 3
data$like.uk[data$v71g == "DO NOT LIKE"] <- 2
data$like.uk[data$v71g == "NOT LIKE AT ALL"] <- 1
data$like.uk[data$v71g == "DON'T KNOW"] <- NA
data$like.uk[data$v71g == "NO ANSWER"] <- NA

# 71h. 印度

data$like.india[data$v71h == "VERY LIKE"] <- 4
data$like.india[data$v71h == "LIKE"] <- 3
data$like.india[data$v71h == "DO NOT LIKE"] <- 2
data$like.india[data$v71h == "NOT LIKE AT ALL"] <- 1
data$like.india[data$v71h == "DON'T KNOW"] <- NA
data$like.india[data$v71h == "NO ANSWER"] <- NA

# 74c. 父母的要求即使不合理,子女仍应该照着去做

data$obey.parents[data$v74c == "STRONGLY AGREE"] <- 4
data$obey.parents[data$v74c == "AGREE"] <- 3
data$obey.parents[data$v74c == "DISAGREE"] <- 2
data$obey.parents[data$v74c == "STRONGLY DISAGREE"] <- 1
data$obey.parents[data$v74c == "DON'T KNOW"] <- NA
data$obey.parents[data$v74c == "NO ANSWER"] <- NA

# 74f. 婆媳闹矛盾时,即使婆婆不对,做丈夫的也应该劝妻子听 婆婆的话


data$obey.motherinlaw[data$v74f == "STRONGLY AGREE"] <- 4
data$obey.motherinlaw[data$v74f == "AGREE"] <- 3
data$obey.motherinlaw[data$v74f == "DISAGREE"] <- 2
data$obey.motherinlaw[data$v74f == "STRONGLY DISAGREE"] <- 1
data$obey.motherinlaw[data$v74f == "DON'T KNOW"] <- NA
data$obey.motherinlaw[data$v74f == "NO ANSWER"] <- NA

# 74m. 一种意见能否在社会上流传,应由政府决定

data$info.control[data$v74m == "STRONGLY AGREE"] <- 4
data$info.control[data$v74m == "AGREE"] <- 3
data$info.control[data$v74m == "DISAGREE"] <- 2
data$info.control[data$v74m == "STRONGLY DISAGREE"] <- 1
data$info.control[data$v74m == "DON'T KNOW"] <- NA
data$info.control[data$v74m == "NO ANSWER"] <- NA

# 74p. 如果只要生一个孩子, 生儿子比生女儿好

data$prefer.sons[data$v74p == "STRONGLY AGREE"] <- 4
data$prefer.sons[data$v74p == "AGREE"] <- 3
data$prefer.sons[data$v74p == "DISAGREE"] <- 2
data$prefer.sons[data$v74p == "STRONGLY DISAGREE"] <- 1
data$prefer.sons[data$v74p == "DON'T KNOW"] <- NA
data$prefer.sons[data$v74p == "NO ANSWER"] <- NA

# 76. 有人认为即使会导致不平等, 国家也应该放手让每一个人赚钱;也有人认为即使可能养懒人, 国家也应该设法减低不平等, 您的意见是什么?

data$reduce.inequality[data$v76 == 2] <- 1
data$reduce.inequality[data$v76 == 1] <- 0 
data$reduce.inequality[data$v76 == 3] <- NA
data$reduce.inequality[data$v76 == 8] <- NA
data$reduce.inequality[data$v76 == 9] <- NA

# 78. 有人认为即使会引起社会不安,也应该赋予人们言论自由;也有人认为即使需要对于人民表达 自由加以限制, 也应该要保障社会安定。您的意见是什么?

data$speech.over.stability[data$v78 == "FREEDOM OF EXPRESSION"] <- 1
data$speech.over.stability[data$v78 == "STABILITY IS MORE IMPORTANT"] <- 0 
data$speech.over.stability[data$v78 == "DON'T KNOW"] <- NA
data$speech.over.stability[data$v78 == "NO ANSWER"] <- NA

# 79. 有人认为我国的选举应该普遍采取差额的办法;也有人认为为防止各种冲突,我们应该继续用 老办法选举。您的意见是什么?

data$multicandidate.elections[data$v79 == "MULTICANDIDATE ELECTION"] <- 1
data$multicandidate.elections[data$v79 == "OLD WAY OF ELECTION"] <- 0 
data$multicandidate.elections[data$v79 == "DON'T KNOW"] <- NA
data$multicandidate.elections[data$v79 == "NO ANSWER"] <- NA

# 80. 为解决我国面临的问题,有些人认为需要对于我国的政治制度做根本性的变革;也有人认为需 要对于我国的政治制度做一些调整;还有人认为没有必要进行政治制度方面的改变。您的意见 是什么?

data$fundamental.reform[data$v80 == "FOUNDAMENTAL CHANGE"] <- 3
data$fundamental.reform[data$v80 == "ADJUSTMENT"] <- 2 
data$fundamental.reform[data$v80 == "NO NEED FOR CHANGE"] <- 1 
data$fundamental.reform[data$v80 == "DON'T KNOW"] <- NA
data$fundamental.reform[data$v80 == "NO ANSWER"] <- NA

# i2. 请问下列说法,哪一个最符合您的看法?

data$democracy.better[data$i2 == "DEMOCRATIC SYSTEM BETTER"] <- 3
data$democracy.better[data$i2 == "SAME"] <- 2 
data$democracy.better[data$i2 == "AUTHORITARIAN GOVERNMENT MAY BETTER"] <- 1 
data$democracy.better[data$i2 == "DON'T KNOW"] <- NA
data$democracy.better[data$i2 == "NO ANSWER"] <- NA

# i3. 如果 1 代表完全不民主,10 代表完全民主,在程度上,分数越小越不民主,分数越大民主。请问 您认为:

# i3a. 79 年改革开放之前,中国民主程度属于哪 一种?

data$pre79.democracy <- data$i3a
data$pre79.democracy[data$i3a %in% 97:99] <- NA

# i3b. 90 年代初期,中国民主程度属于哪一种?

data$nineties.democracy <- data$i3b
data$nineties.democracy[data$i3b %in% 97:99] <- NA

# i3c. 中国目前的民主程度属于哪一种?

data$current.democracy <- data$i3c
data$current.democracy[data$i3c %in% 97:99] <- NA

# i3d. 中国目前应该有什么程度的民主?

data$ideal.democracy <- data$i3d
data$ideal.democracy[data$i3d %in% 97:99] <- NA

# i3e. 五年后中国的民主程度什么样?

data$in5years.democracy <- data$i3e
data$in5years.democracy[data$i3e %in% 97:99] <- NA

# i4. 如果 1 表示完全不适合,10 表示完全适合,在程度上,分数越小表示越不适合,分数越大表示越 适合。请问您觉得民主对中国的适合程度如何?

data$democracy.suitable <- data$i4
data$democracy.suitable[data$i4 %in% 98:99] <- NA

# i5. 有些人认为可以改用其它的方法来治理我们的国家,下面列出四种方法,请问您的看法如何?对 这些方法,您是非常同意,同意,不同意,还是非常不同意?

# i5a. 让人民透过选举,来决定谁来领导国家

data$elect.leaders[data$i5a == "STRONGLY AGREE"] <- 4
data$elect.leaders[data$i5a == "AGREE"] <- 3
data$elect.leaders[data$i5a == "DISAGREE"] <- 2
data$elect.leaders[data$i5a == "STRONGLY DISAGREE"] <- 1
data$elect.leaders[data$i5a == "DON'T KNOW"] <- NA
data$elect.leaders[data$i5a == "NO ANSWER"] <- NA

# i5b. 废除人大和选举,由专家来决定一切

data$abolish.elections[data$i5b == "STRONGLY AGREE"] <- 4
data$abolish.elections[data$i5b == "AGREE"] <- 3
data$abolish.elections[data$i5b == "DISAGREE"] <- 2
data$abolish.elections[data$i5b == "STRONGLY DISAGREE"] <- 1
data$abolish.elections[data$i5b == "DON'T KNOW"] <- NA
data$abolish.elections[data$i5b == "NO ANSWER"] <- NA

# i5c. 允许不同的党派之间竞争,来决定谁来执政

data$multiparty.elections[data$i5c == "STRONGLY AGREE"] <- 4
data$multiparty.elections[data$i5c == "AGREE"] <- 3
data$multiparty.elections[data$i5c == "DISAGREE"] <- 2
data$multiparty.elections[data$i5c == "STRONGLY DISAGREE"] <- 1
data$multiparty.elections[data$i5c == "DON'T KNOW"] <- NA
data$multiparty.elections[data$i5c == "NO ANSWER"] <- NA

# i5d. 由军队来统治国家

data$military.rule[data$i5d == "STRONGLY AGREE"] <- 4
data$military.rule[data$i5d == "AGREE"] <- 3
data$military.rule[data$i5d == "DISAGREE"] <- 2
data$military.rule[data$i5d == "STRONGLY DISAGREE"] <- 1
data$military.rule[data$i5d == "DON'T KNOW"] <- NA
data$military.rule[data$i5d == "NO ANSWER"] <- NA

# i6. 如果您必须在民主与经济发展之间做一选择,请问您觉得哪一项比较重要?

data$democracy.over.development[data$i6 == "DEMOCRACY ABSOLUTELY IMPORTANT"] <- 5
data$democracy.over.development[data$i6 == "DEMOCRACY RELATIVE IMPORTANT"] <- 4
data$democracy.over.development[data$i6 == "EQUALLY IMPORTANT"] <- 3
data$democracy.over.development[data$i6 == "ECON DEV RELATIVELY IMPORTANT"] <- 2
data$democracy.over.development[data$i6 == "ECON DEV ABSOLUTE MORE IMPORTANT"] <- 1
data$democracy.over.development[data$i6 == "DON'T KNOW"] <- NA
data$democracy.over.development[data$i6 == "NO ANSWER"] <- NA

################################################################################################
################################# Recoding Demographic Variables  ##############################
################################################################################################

# Sex

data$female[data$v82 == "FEMALE"] <- 1
data$female[data$v82 == "MALE"] <- 0

# Age

data$years <- data$v83

# Education Level
# Note: 1 = 不识字或小学没毕业, 2 = 小学毕业, 3 = 中学毕业, 4 = 高中、中专、中技, 5 = 夜大、职大、电大、函大毕业或通过成人自学考试, 6 = 全日制大专、大学毕业, 7 = 研究生毕业

data$edulevel[data$v110 == "ILLITERATE"] <- 1
data$edulevel[data$v110 == "PRIMARY SCHOOL"] <- 2
data$edulevel[data$v110 == "MIDDLE SCHOOL"] <- 3
data$edulevel[data$v110 == "HIGH/TECHNICAL SCHOOL"] <- 4
data$edulevel[data$v110 == "EVENING COLLEGE"] <- 5
data$edulevel[data$v110 == "COLLEGE"] <- 6
data$edulevel[data$v110 == "GRADUATE SCHOOL"] <- 7
data$edulevel[data$v110 == "NO ANSWER"] <- NA

# Party Membership

data$ccp[data$v111 == "CCP"] <- 1
data$ccp[data$v111 %in% c("MASS", "COMMUNIST YOUTH LEAGUE", "DEMOCRATIC PARTY")] <- 0
data$ccp[data$v111 == "NO ANSWER"] <- NA


# Communist Youth League Membership

data$communist.youth.league[data$v111 == "COMMUNIST YOUTH LEAGUE"] <- 1
data$communist.youth.league[data$v111 %in% c("MASS", "CCP", "DEMOCRATIC PARTY")] <- 0
data$communist.youth.league[data$v111 == "NO ANSWER"] <- NA

# Applied to CCP [only applicable for party members]

data$applied.ccp[data$v112 == "YES"] <- 1
data$applied.ccp[data$v112 == "NO"] <- 0
data$applied.ccp[data$v112 %in% c("NOT APPLICABLE", "NO ANSWER")] <- NA

# Hukou Status

data$rural.hukou[data$v2 == "RURAL"] <- 1
data$rural.hukou[data$v2 == "URBAN"] <- 0
data$rural.hukou[data$v2 %in% c("NO REGISTRATION, NO ANSWER")] <- NA

# Have ever been a Migrant Worker? (if Rural Hukou)

data$migrant.worker[data$v3a == "YES"] <- 1
data$migrant.worker[data$v3a == "NO"] <- 0
data$migrant.worker[data$v3a %in% c("NOT APPLICABLE, NO ANSWER")] <- NA

# Total Personal Income - if doing analysis, look at this a bit more.

data$income <- data$v156a
data$income[data$v156a %in% c(99997:99999)] <- NA

# Political Knowledge

## 中国共产党总书记
data$pol.know1[data$v13a == 1] <- 1
data$pol.know1[data$v13a != 1] <- 0

## 我国总理

data$pol.know2[data$v13b == 1] <- 1
data$pol.know2[data$v13b != 1] <- 0

## 我国全国人大常委会委员长

data$pol.know3[data$v13c == 1] <- 1
data$pol.know3[data$v13c != 1] <- 0

## 美国总统

data$pol.know4[data$v13d == 1] <- 1
data$pol.know4[data$v13d != 1] <- 0

## 俄罗斯总统

data$pol.know5[data$v13e == 1] <- 1
data$pol.know5[data$v13e != 1] <- 0

## 香港特区行政长官

data$pol.know6[data$v13f == 1] <- 1
data$pol.know6[data$v13f != 1] <- 0

# 台湾“政府”最高首脑

data$pol.know7[data$v13g == 1] <- 1
data$pol.know7[data$v13g != 1] <- 0

# 我国国家主席

data$pol.know8[data$v13h == 1] <- 1
data$pol.know8[data$v13h != 1] <- 0

## Total number correct

data$pol.knowsum <- rowSums(cbind(data$pol.know1, data$pol.know2, data$pol.know3, data$pol.know4, data$pol.know5, data$pol.know6, data$pol.know7, data$pol.know8))

################################################################################################
################# Figures 1d and 1e: Graphic Representation of Responses #######################
################################################################################################

library(scales)

barplot <- ggplot(data, aes(x = own.ideology)) + 
  geom_bar(fill="slategray2") + 
  scale_x_continuous(breaks = pretty_breaks(n = 7), limits = c(0.5,7.5)) + 
  xlab("Self-Placement on the Left-Right Scale") + 
  ylab("Number of Respondents") + 
  theme_bw() + 
  annotate("text", label = "Nonresponse Rate:\n 42.3%", x = 6.5, y = 600, size = 6) +
  theme(text = element_text(size=16))

## This produces Figure 1d

pdf("2002_barplot.pdf", width = 9, height = 6)
barplot
dev.off()


barplot.ccp <- ggplot(data, aes(x = ccp.ideology)) +
  geom_bar(fill="slategray2") +
  scale_x_continuous(breaks = pretty_breaks(n = 7), limits = c(0.5,7.5)) +
  xlab("CCP Placement on the Left-Right Scale") +
  ylab("Number of Respondents") +
  theme_bw() +
  annotate("text", label = "Nonresponse Rate:\n 46.7%", x = 6.5, y = 500, size = 5.8) +
  theme(text = element_text(size=16)) 

## This produces Figure 1e

pdf("2002_ccp_barplot.pdf", width = 9, height = 6)
barplot.ccp
dev.off()

################################################################################################
#################### Correlation Analysis of Left-Right Placement  #############################
################################################################################################

## Some code is repeated with some variations in this section and the next to produce the equivalence analysis, which adds a dashed vertical line
## that denotes a negligible effect.

## List of questions used

predictorsList <- names(data[,c(547:574, 601:609, 615:620)])

## Create list of correlations

correlations <- Reduce(rbind, lapply(predictorsList, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson"))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

## Create names for our questions and categorize by issue

questionLabels <- c("Government should always obey the law", "Elections help the economy", "Courts should consider local government's opinion", "CCP should lead implementation of democracy", "Our government fits China's circumstances", "Should not wait for trial to punish vicious crimes", "Should unconditionally support the government", "Government officials are infalliable", "China's political system is the best in the world", "Too many groups disrupt social harmony", "Most officials can sacrifice to serve the people", "The pace of political reform is too fast", "The pace of social change is too fast", "Maintaining national unity is paramount", "Our country should broaden democracy", "National leaders usually tell the truth", "Ethnic minorities should not ask for independence", "Elites dominate affairs, masses have no voice", "State owned enterprises should not be privatized", "Increase central oversight of local governments", "Should keep own way of life, avoid copying others", "Individual interests can be sacrificed for the state", "Can use any means to accomplish political goals", "Can ignore opposition to achieve political goals", "Leaders with majority support can ignore minority", "Poverty caused by individual choices, not society", "Government should restrict individual income", "China should not carry out political reform", "Children should obey unreasonable parents", "Husband should tell wife to listen to mother-in-law", "Government should control spread of information", "It is better to have a son than a daughter", "Government should reduce income inequality", "Protect free speech even if stability at risk", "Elections should have more candidates than seats", "Our political system needs fundamental change", "Democracy is always better than autocracy", "Democracy is suitable for China", "National leaders should be elected", "Abolish elections and people's congresses", "Multiple parties should contest elections", "The military should rule the country", "Democracy takes precedence over development")

issueType <- c("Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Social\nIssues", "Political\nIssues", "Political\nIssues", "Social\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Economic\nIssues", "Political\nIssues", "Political\nIssues", "Social\nIssues" ,"Political\nIssues", "Political\nIssues", "Political\nIssues", "Economic\nIssues", "Economic\nIssues", "Political\nIssues", "Social\nIssues", "Social\nIssues", "Political\nIssues", "Social\nIssues", "Economic\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues" ,"Political\nIssues" ,"Political\nIssues" ,"Political\nIssues" ,"Political\nIssues" ,"Political\nIssues")

# Create data frame of correlations and then order them

bivariateCorrs <- cbind(predictorsList, correlations, questionLabels, issueType)

orderedCorrs <- bivariateCorrs[order(bivariateCorrs$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs)[names(orderedCorrs) == 'predictorsList'] <- 'orderedPredictors'

# make the questionLabels an ordered factor for ggplot

orderedCorrs$questionLabels <- factor(orderedCorrs$questionLabels, levels = orderedCorrs$questionLabels[order(orderedCorrs$estimate)])

## This function creates the correlation plot of the questions

credplot.gg <- function(d){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d, aes(y=questionLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0) +
    geom_vline(xintercept = 0, linetype=2) +
    scale_x_continuous(breaks = c(-0.15, -0.1, -0.05, 0, 0.05, 0.1), limits = c(-0.157, 0.131)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

################################################################################################
#################### Figure 6: Equivalence Analysis of Left-Right Placement  ###################
################################################################################################

# cor(data$own.ideology, data$living.allowance.improved, method = "pearson", use = "complete.obs")
# 
# tidy(cor.test(data$own.ideology, data$elect.leaders, method = "pearson"))

correlations90 <- Reduce(rbind, lapply(predictorsList, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson", conf.level = 0.9))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels

questionLabels <- c("Government should always obey the law", "Elections help the economy", "Courts should consider local government's opinion", "CCP should lead implementation of democracy", "Our government fits China's circumstances", "Should not wait for trial to punish vicious crimes", "Should unconditionally support the government", "Government officials are infalliable", "China's political system is the best in the world", "Too many groups disrupt social harmony", "Most officials can sacrifice to serve the people", "The pace of political reform is too fast", "The pace of social change is too fast", "Maintaining national unity is paramount", "Our country should broaden democracy", "National leaders usually tell the truth", "Ethnic minorities should not ask for independence", "Elites dominate affairs, masses have no voice", "State owned enterprises should not be privatized", "Increase central oversight of local governments", "Should keep own way of life, avoid copying others", "Individual interests can be sacrificed for the state", "Can use any means to accomplish political goals", "Can ignore opposition to achieve political goals", "Leaders with majority support can ignore minority", "Poverty caused by individual choices, not society", "Government should restrict individual income", "China should not carry out political reform", "Children should obey unreasonable parents", "Husband should tell wife to listen to mother-in-law", "Government should control spread of information", "It is better to have a son than a daughter", "Government should reduce income inequality", "Protect free speech even if stability at risk", "Elections should have more candidates than seats", "Our political system needs fundamental change", "Democracy is always better than autocracy", "Democracy is suitable for China", "National leaders should be elected", "Abolish elections and people's congresses", "Multiple parties should contest elections", "The military should rule the country", "Democracy takes precedence over development")



issueType <- c("Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Social\nIssues", "Political\nIssues", "Political\nIssues", "Social\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Economic\nIssues", "Political\nIssues", "Political\nIssues", "Social\nIssues" ,"Political\nIssues", "Political\nIssues", "Political\nIssues", "Economic\nIssues", "Economic\nIssues", "Political\nIssues", "Social\nIssues", "Social\nIssues", "Political\nIssues", "Social\nIssues", "Economic\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues" ,"Political\nIssues" ,"Political\nIssues" ,"Political\nIssues" ,"Political\nIssues" ,"Political\nIssues")


bivariateCorrs90 <- cbind(predictorsList, correlations90, questionLabels, issueType)

# ordered by estimate

orderedCorrs90 <- bivariateCorrs90[order(bivariateCorrs90$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs90)[names(orderedCorrs90) == 'predictorsList'] <- 'orderedPredictors'

# make the questionLabels an ordered factor for ggplot

orderedCorrs90$questionLabels <- factor(orderedCorrs90$questionLabels, levels = orderedCorrs90$questionLabels[order(orderedCorrs90$estimate)])

credplot.gg <- function(d1, d2){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d1, aes(y=questionLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0, size = 1) +
    geom_errorbarh(height = 0, data = d2, aes(xmin=conf.low, xmax=conf.high)) +
    geom_vline(xintercept = 0, linetype=5) +
    geom_vline(xintercept = -0.13, linetype=3) +
    geom_vline(xintercept = 0.13, linetype=3) +
    scale_x_continuous(breaks = c(-0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15, 0.15), limits = c(-0.16, 0.16)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

pdf("2002_L-R_Correlations90.pdf", width = 8.5, height = 11)
credplot.gg(orderedCorrs90, orderedCorrs)
dev.off()


################################################################################################
#################### Correlation Analysis of Partisan and Symbolic Issues  #####################
################################################################################################

## Some code is repeated with some variations in this section and the next to produce the equivalence analysis, which adds a dashed vertical line
## that denotes a negligible effect.

## Create list of partisan and symbolic variables

symbolicList.02 <- names(data[,c(546, 594, 595, 624)])

correlations.sym02 <- Reduce(rbind, lapply(symbolicList.02, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson"))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels


symLabels <- c("CCP Left-Right Placement", "Like the United States", "Like Japan", "Communist Party Membership")


# Create vector of issue types

issueType <- c("2002\nSurvey","2002\nSurvey","2002\nSurvey","2002\nSurvey")

bivariateCorrs.sym02 <- cbind(symbolicList.02, correlations.sym02, symLabels, issueType)


# ordered by estimate

orderedCorrs.sym02 <- bivariateCorrs.sym02[order(bivariateCorrs.sym02$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs.sym02)[names(orderedCorrs.sym02) == 'symbolicList.02'] <- 'orderedPredictors'

# make the symLabels an ordered factor for ggplot

orderedCorrs.sym02$symLabels <- factor(orderedCorrs.sym02$symLabels, levels = orderedCorrs.sym02$symLabels[order(orderedCorrs.sym02$estimate)])

# Save our file so we can combine later
save(orderedCorrs.sym02, file = "symCorrs02.Rda")


credplot.gg <- function(d){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d, aes(y=symLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0) +
    geom_vline(xintercept = 0, linetype=2) +
    scale_x_continuous(breaks = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5), limits = c(-0.59, 0.59)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

pdf("2002_L-R_correlations_sym.pdf", width = 8.5, height = 6)
credplot.gg(orderedCorrs.sym02)
dev.off()


################################################################################################
########### Bottom Panel of Figure 8: Equivalence Analysis of Partisan and Symbolic Issues  ####
################################################################################################

correlations.sym0290 <- Reduce(rbind, lapply(symbolicList.02, function(x){
  tidy(cor.test(data$own.ideology, data[[x]], method = "pearson", conf.level = 0.9))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Create vector of longer question labels


symLabels <- c("CCP Left-Right Placement", "Like the United States", "Like Japan", "Communist Party Membership")


# Create vector of issue types

issueType <- c("2002\nSurvey","2002\nSurvey","2002\nSurvey","2002\nSurvey")

bivariateCorrs.sym0290 <- cbind(symbolicList.02, correlations.sym0290, symLabels, issueType)


# ordered by estimate

orderedCorrs.sym0290 <- bivariateCorrs.sym0290[order(bivariateCorrs.sym0290$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs.sym0290)[names(orderedCorrs.sym0290) == 'symbolicList.02'] <- 'orderedPredictors'

# make the symLabels an ordered factor for ggplot

orderedCorrs.sym0290$symLabels <- factor(orderedCorrs.sym0290$symLabels, levels = orderedCorrs.sym0290$symLabels[order(orderedCorrs.sym0290$estimate)])

# Save our file so we can combine later
save(orderedCorrs.sym0290, file = "symCorrs0290.Rda")


credplot.gg <- function(d1, d2){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d1, aes(y=symLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point(size = 2.5) +
    geom_errorbarh(height = 0, size = 1) +
    geom_errorbarh(height = 0, data = d2, aes(xmin=conf.low, xmax=conf.high)) +
    geom_vline(xintercept = 0, linetype=5) +
    geom_vline(xintercept = -0.13, linetype=3) +
    geom_vline(xintercept = 0.13, linetype=3) +
    scale_x_continuous(breaks = c(-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5), limits = c(-0.59, 0.59)) +
    xlab('Correlation with Left-Right Self-Identification') + 
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() + 
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

## This creates the bottom panel of Figure 8
pdf("2002_L-R_correlations_sym90.pdf", width = 8.5, height = 6)
credplot.gg(orderedCorrs.sym0290, orderedCorrs.sym02)
dev.off()

################################################################################################
####################### Subgroup Analysis: High Political Knowledge  ###########################
################################################################################################

correlations.highinfo <- Reduce(rbind, lapply(predictorsList, function(x){
  tidy(cor.test(data[data$pol.knowsum %in% c("7", "8"), ]$own.ideology, data[data$pol.knowsum %in% c("7", "8"), ][[x]], method = "pearson"))[c("estimate", "conf.low", "conf.high", "statistic", "parameter")]
}))

# Have to re-specify this
issueType <- c("Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Social\nIssues", "Political\nIssues", "Political\nIssues", "Social\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Economic\nIssues", "Political\nIssues", "Political\nIssues", "Social\nIssues" ,"Political\nIssues", "Political\nIssues", "Political\nIssues", "Economic\nIssues", "Economic\nIssues", "Political\nIssues", "Social\nIssues", "Social\nIssues", "Political\nIssues", "Social\nIssues", "Economic\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues", "Political\nIssues" ,"Political\nIssues" ,"Political\nIssues" ,"Political\nIssues" ,"Political\nIssues" ,"Political\nIssues")

bivariateCorrs.hi <- cbind(predictorsList, correlations.highinfo, questionLabels, issueType)


# ordered by estimate

orderedCorrs.hi <- bivariateCorrs.hi[order(bivariateCorrs.hi$estimate), ]

# rename columns of names to orderedPredictors

names(orderedCorrs.hi)[names(orderedCorrs.hi) == 'predictorsList'] <- 'orderedPredictors'

# make the questionLabels an ordered factor for ggplot

orderedCorrs.hi$questionLabels <- factor(orderedCorrs.hi$questionLabels, levels = orderedCorrs.hi$questionLabels[order(orderedCorrs.hi$estimate)])


credplot.gg <- function(d){
  # d is a data frame with 4 columns
  # d$x gives variable names
  # d$y gives center point
  # d$ylo gives lower limits
  # d$yhi gives upper limits
  require(ggplot2)
  p <- ggplot(d, aes(y=questionLabels, x=estimate, xmin=conf.low, xmax=conf.high)) +
    geom_point() +
    geom_errorbarh(height = 0) +
    geom_vline(xintercept = 0, linetype=2) +
    scale_x_continuous(breaks = c(-0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15), limits = c(-0.195, 0.195)) +
    xlab('Correlation with Left-Right Self-Identification') +
    ylab('') +
    facet_grid(issueType ~ ., scales = "free", space = "free") +
    theme_bw() +
    theme(strip.text.y = element_text(angle = 0), text = element_text(size=12))
  return(p)
}

pdf("2002_L-R_Correlations_highinfo.pdf", width = 8.5, height = 7)
credplot.gg(orderedCorrs.hi)
dev.off()





################################################################################################
#### PCA- Appendix Figures A.4, panels b) and d), and Appendix Figure A.5, panels e) and f) ####
################################################################################################

completeIndices <- complete.cases(data[,predictorsList])

fullQDataFrame <- data[completeIndices,predictorsList]

idealFrame <- data[completeIndices, "own.ideology"]


data[completeIndices, "own.ideology"]

library(ade4)
library(factoextra)
library(magrittr)


out.dudi <- dudi.pca(fullQDataFrame, center = TRUE, scale = TRUE, scannf=FALSE, nf=25)

VarianceProp <- 100 * out.dudi$eig/sum(out.dudi$eig)

## Figure A.4 b): Some descriptive statistics of results

pdf("2002_pca_scree.pdf", width = 6, height = 4)
fviz_eig(out.dudi)
dev.off()

screeplot(out.dudi, main = "Screeplot - Eigenvalues")



## Figure A.4 d) - Question Loadings
pdf("2002_pca_vars.pdf", width = 9, height = 6)
fviz_pca_var(out.dudi, 
             axes = c(1,2),
             repel = TRUE     # Avoid text overlapping
)
dev.off()

idealPCAFrame <- cbind(idealFrame, out.dudi$li)
colnames(idealPCAFrame)[1] <- "own.ideology"

summary(lm(own.ideology ~ Axis1, data = idealPCAFrame))
summary(lm(own.ideology ~ Axis2, data = idealPCAFrame))
summary(lm(own.ideology ~ Axis3, data = idealPCAFrame))
summary(lm(own.ideology ~ Axis4, data = idealPCAFrame))

library("ggpubr")

## Figure A.5 e)
pdf("2002_pca_s1.pdf", width = 6, height = 4)
ggscatter(idealPCAFrame, x = "own.ideology", y = "Axis1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Left-Right Self-Placement", ylab = "First Principal Component")
dev.off()

## Figure A.5 f)
pdf("2002_pca_s2.pdf", width = 6, height = 4)
ggscatter(idealPCAFrame, x = "own.ideology", y = "Axis2", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Left-Right Self-Placement", ylab = "Second Principal Component")
dev.off()